% This is the main LaTeX file which is used to produce the Biopython
% Tutorial documentation.
%
% If you just want to read the documentation, you can pick up ready-to-go
% copies in both pdf and html format from:
%
% http://biopython.org/DIST/docs/tutorial/Tutorial.html
% http://biopython.org/DIST/docs/tutorial/Tutorial.pdf
%
% If you want to typeset the documentation, you'll need a standard TeX/LaTeX
% distribution (I use teTeX, which works great for me on Unix platforms).
% Additionally, you need HeVeA (or at least hevea.sty), which can be
% found at:
%
% http://pauillac.inria.fr/~maranget/hevea/index.html
%
% You will also need the pictures included in the document, some of
% which are UMLish diagrams created by Dia
% (http://www.lysator.liu.se/~alla/dia/dia.html).
% These diagrams are available from Biopython git in the original dia
% format, which you can easily save as .png format using Dia itself.
% They are also checked in as the png files, so if you make
% modifications to the original dia files, the png files should also be
% changed.
%
% Once you're all set, you should be able to generate pdf by running:
%
% pdflatex Tutorial.tex  (to generate the first draft)
% pdflatex Tutorial.tex  (to get the cross references right)
% pdflatex Tutorial.tex  (to get the table of contents right)
%
% To generate the html, you'll need HeVeA installed. You should be
% able to just run:
%
% hevea -fix Tutorial.tex
%
% However, on older versions of hevea you may first need to remove the
% Tutorial.aux file generated by LaTeX, then run hevea twice to get
% the references right.
%
% If you want to typeset this and have problems, please get in touch via
% the mailing list or GitHub, and we'll try to get things resolved. We
% always love to have people interested in the documentation!

\documentclass{report}
\usepackage{url}
\usepackage{fullpage}
\usepackage{hevea}
\usepackage{graphicx}
\usepackage{listings}

% make everything have section numbers
\setcounter{secnumdepth}{4}

% Make links between references
\usepackage{hyperref}
\newif\ifpdf
\ifx\pdfoutput\undefined
  \pdffalse
\else
  \pdfoutput=1
  \pdftrue
\fi
\ifpdf
  \hypersetup{colorlinks=true, hyperindex=true, citecolor=red, urlcolor=blue}
\fi

\begin{document}

\begin{htmlonly}
\title{Biopython Tutorial and Cookbook}
\end{htmlonly}
\begin{latexonly}
\title{
%Hack to get the logo on the PDF front page:
\includegraphics[width=\textwidth]{images/biopython_logo.pdf}\\
%Hack to get some white space using a blank line:
~\\
Biopython Tutorial and Cookbook}
\end{latexonly}

\author{Jeff Chang, Brad Chapman, Iddo Friedberg, Thomas Hamelryck, \\
Michiel de Hoon, Peter Cock, Tiago Antao, Eric Talevich, Bartek Wilczy\'{n}ski}
\date{Last Update -- 18 December 2018 (Biopython 1.74.dev0)}

%Hack to get the logo at the start of the HTML front page:
%(hopefully this isn't going to be too wide for most people)
\begin{rawhtml}
<P ALIGN="center">
<IMG ALIGN="center" SRC="images/biopython_logo.svg" TITLE="Biopython Logo" ALT="[Biopython Logo]" width="450" height="300" />
</p>
\end{rawhtml}

\maketitle
\tableofcontents

\include{Tutorial/chapter_introduction}
\include{Tutorial/chapter_quick_start}
\include{Tutorial/chapter_seq_objects}
\include{Tutorial/chapter_seq_annot}
\include{Tutorial/chapter_seqio}
\include{Tutorial/chapter_align}
\include{Tutorial/chapter_blast}
\include{Tutorial/chapter_searchio}
\include{Tutorial/chapter_entrez}
\include{Tutorial/chapter_uniprot}
\include{Tutorial/chapter_pdb}
\include{Tutorial/chapter_popgen}
\include{Tutorial/chapter_phylo}
\include{Tutorial/chapter_motifs}
\include{Tutorial/chapter_cluster}
\include{Tutorial/chapter_learning}
\include{Tutorial/chapter_graphics}
\include{Tutorial/chapter_kegg}
\include{Tutorial/chapter_phenotype}
\include{Tutorial/chapter_cookbook}
\include{Tutorial/chapter_testing}
\include{Tutorial/chapter_advanced}
\include{Tutorial/chapter_contributing}
\include{Tutorial/chapter_appendix}

\begin{thebibliography}{99}
\bibitem{cock2009}
Peter J. A. Cock, Tiago Antao, Jeffrey T. Chang, Brad A. Chapman, Cymon J. Cox, Andrew Dalke, Iddo Friedberg, Thomas Hamelryck, Frank Kauff, Bartek Wilczynski, Michiel J. L. de Hoon: ``Biopython: freely available Python tools for computational molecular biology and bioinformatics''. {\it Bioinformatics} {\bf 25} (11), 1422--1423 (2009).
\url{https://doi.org/10.1093/bioinformatics/btp163}
\bibitem{pritchard2006}
Leighton Pritchard, Jennifer A. White, Paul R.J. Birch, Ian K. Toth: ``GenomeDiagram: a python package for the visualization of large-scale genomic data''.  {\it Bioinformatics} {\bf 22} (5): 616--617 (2006).
\url{https://doi.org/10.1093/bioinformatics/btk021}
\bibitem{toth2006}
Ian K. Toth, Leighton Pritchard, Paul R. J. Birch: ``Comparative genomics reveals what makes an enterobacterial plant pathogen''. {\it Annual Review of Phytopathology} {\bf 44}: 305--336 (2006).
\url{https://doi.org/10.1146/annurev.phyto.44.070505.143444}
\bibitem{vanderauwera2009}
G\'eraldine A. van der Auwera, Jaroslaw E. Kr\'ol, Haruo Suzuki, Brian Foster, Rob van Houdt, Celeste J. Brown, Max Mergeay, Eva M. Top: ``Plasmids captured in C. metallidurans CH34: defining the PromA family of broad-host-range plasmids''.
\textit{Antonie van Leeuwenhoek} {\bf 96} (2): 193--204 (2009).
\url{https://doi.org/10.1007/s10482-009-9316-9}
\bibitem{proux2002}
Caroline Proux, Douwe van Sinderen, Juan Suarez, Pilar Garcia, Victor Ladero, Gerald F. Fitzgerald, Frank Desiere, Harald Br\"ussow:
``The dilemma of phage taxonomy illustrated by comparative genomics of Sfi21-Like Siphoviridae in lactic acid bacteria''.  \textit{Journal of Bacteriology} {\bf 184} (21): 6026--6036 (2002).
\url{https://doi.org/10.1128/JB.184.21.6026-6036.2002}
\bibitem{jupe2012}
Florian Jupe, Leighton Pritchard, Graham J. Etherington, Katrin MacKenzie, Peter JA Cock, Frank Wright, Sanjeev Kumar Sharma1, Dan Bolser, Glenn J Bryan, Jonathan DG Jones, Ingo Hein: ``Identification and localisation of the NB-LRR gene family within the potato genome''. \textit{BMC Genomics} {\bf 13}: 75 (2012).
\url{https://doi.org/10.1186/1471-2164-13-75}
\bibitem{cock2010}
Peter J. A. Cock, Christopher J. Fields, Naohisa Goto, Michael L. Heuer, Peter M. Rice: ``The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants''.  \textit{Nucleic Acids Research} {\bf 38} (6): 1767--1771 (2010). \url{https://doi.org/10.1093/nar/gkp1137}
\bibitem{brown1999}
Patrick O. Brown, David Botstein: ``Exploring the new world of the genome with DNA microarrays''. \textit{Nature Genetics} {\bf 21} (Supplement 1), 33--37 (1999). \url{https://doi.org/10.1038/4462}
\bibitem{talevich2012}
Eric Talevich, Brandon M. Invergo, Peter J.A. Cock, Brad A. Chapman: ``Bio.Phylo: A unified toolkit for processing, analyzing and visualizing phylogenetic trees in Biopython''.  \textit{BMC Bioinformatics} {\bf 13}: 209 (2012).
\url{https://doi.org/10.1186/1471-2105-13-209}
\bibitem{cornish1985}
Athel Cornish-Bowden: ``Nomenclature for incompletely specified bases in nucleic acid sequences: Recommendations 1984.'' \textit{Nucleic Acids Research} {\bf 13} (9): 3021--3030 (1985).
\url{https://doi.org/10.1093/nar/13.9.3021}
\bibitem{cavener1987}
Douglas R. Cavener: ``Comparison of the consensus sequence flanking translational start sites in Drosophila and vertebrates.'' \textit{Nucleic Acids Research} {\bf 15} (4): 1353--1361 (1987).
\url{https://doi.org/10.1093/nar/15.4.1353}
\bibitem{bailey1994}
Timothy L. Bailey and Charles Elkan: ``Fitting a mixture model by expectation maximization to discover motifs in biopolymers'', \textit{Proceedings of the Second International Conference on Intelligent Systems for Molecular Biology} 28--36. AAAI Press, Menlo Park, California (1994).
\bibitem{chapman2000}
Brad Chapman and Jeff Chang: ``Biopython: Python tools for computational biology''. \textit{ACM SIGBIO Newsletter} {\bf 20} (2): 15--19 (August 2000).
\bibitem{dehoon2004}
Michiel J. L. de Hoon, Seiya Imoto, John Nolan, Satoru Miyano: ``Open source clustering software''. \textit{Bioinformatics} {\bf 20} (9): 1453--1454 (2004).
\url{https://doi.org/10.1093/bioinformatics/bth078}
\bibitem{durbin1998}
Richard Durbin, Sean R. Eddy, Anders Krogh, Graeme Mitchison:
``Biological sequence analysis: Probabilistic models of proteins and nucleic acids''.
Cambridge University Press, Cambridge, UK (1998).
\bibitem{eisen1998}
Michiel B. Eisen, Paul T. Spellman, Patrick O. Brown, David Botstein: ``Cluster analysis and display of genome-wide expression patterns''. \textit{Proceedings of the National Academy of Science USA} {\bf 95} (25): 14863--14868 (1998). \url{https://doi.org/10.1073/pnas.96.19.10943-c}
\bibitem{golub1971}
Gene H. Golub, Christian Reinsch: ``Singular value decomposition and least squares solutions''. In \textit{Handbook for Automatic Computation}, {\bf 2}, (Linear Algebra) (J. H. Wilkinson and C. Reinsch, eds), 134--151. New York: Springer-Verlag (1971).
\bibitem{golub1989}
Gene H. Golub, Charles F. Van Loan: \textit{Matrix computations}, 2nd edition (1989).
\bibitem{hamelryck2003a}
Thomas Hamelryck and  Bernard Manderick: 11PDB parser and structure class
implemented in Python''. \textit{Bioinformatics}, \textbf{19} (17): 2308--2310 (2003) \url{https://doi.org/10.1093/bioinformatics/btg299}. 
\bibitem{hamelryck2003b}
Thomas Hamelryck: ``Efficient identification of side-chain patterns using a multidimensional index tree''. \textit{Proteins} {\bf 51} (1): 96--108 (2003).
\url{https://doi.org/10.1002/prot.10338}
\bibitem{hamelryck2005}
Thomas Hamelryck: ``An amino acid has two sides; A new 2D measure provides a different view of solvent exposure''. \textit{Proteins} {\bf 59} (1): 29--48 (2005).
\url{https://doi.org/10.1002/prot.20379}.
\bibitem{hartigan1975}
John A. Hartiga. \textit{Clustering algorithms}. New York: Wiley (1975).
\bibitem{hihara2001}
Yukako Hiharaa, Ayako Kameib, Minoru Kanehisac, Aaron Kapland and Masahiko Ikeuchib: ``DNA microarray analysis of cyanobacterial gene expression during acclimation to high light''. \textit{Plant Cell} {\bf 13} (4): 793--806 (2001). \url{https://doi.org/10.1105/tpc.13.4.793}.
\bibitem{jain1988}
Anil L. Jain, Richard C. Dubes: \textit{Algorithms for clustering data}. Englewood Cliffs, N.J.: Prentice Hall (1988).
\bibitem{kachitvichyanukul1988}
Voratas Kachitvichyanukul, Bruce W. Schmeiser: Binomial Random Variate Generation. \textit{Communications of the ACM} {\bf 31} (2): 216--222 (1988). \url{https://doi.org/10.1145/42372.42381}
\bibitem{kent2002}
W. James Kent: ``BLAT --- The BLAST-Like Alignment Tool''. \textit{Genome Research} {\bf 12}: 656--664 (2002). \url{https://doi.org/10.1101/gr.229202}
\bibitem{kohonen1997}
Teuvo Kohonen: ``Self-organizing maps'', 2nd Edition. Berlin; New York: Springer-Verlag (1997).
\bibitem{lecuyer1988}
Pierre L'Ecuyer: ``Efficient and Portable Combined Random Number Generators.''
\textit{Communications of the ACM} {\bf 31} (6): 742--749,774 (1988).
\url{https://doi.org/10.1145/62959.62969}
\bibitem{majumdar2005}
Indraneel Majumdar, S. Sri Krishna, Nick V. Grishin: ``PALSSE: A program to delineate linear secondary structural elements from protein structures.'' \textit{BMC Bioinformatics}, {\bf 6}: 202 (2005).
\url{https://doi.org/10.1186/1471-2105-6-202}.
\bibitem{matys2003}
V. Matys, E. Fricke, R. Geffers, E. G\"ossling, M. Haubrock, R. Hehl, K. Hornischer, D. Karas, A.E. Kel, O.V. Kel-Margoulis, D.U. Kloos, S. Land, B. Lewicki-Potapov, H. Michael, R. M\"unch, I. Reuter, S. Rotert, H. Saxel, M. Scheer, S. Thiele, E. Wingender E: ``TRANSFAC: transcriptional regulation, from patterns to profiles.'' Nucleic Acids Research {\bf 31} (1): 374--378 (2003).
\url{https://doi.org/10.1093/nar/gkg108}
\bibitem{saldanha2004}
Alok Saldanha: ``Java Treeview---extensible visualization of microarray data''. \textit{Bioinformatics} {\bf 20} (17): 3246--3248 (2004).
\url{https://doi.org/10.1093/bioinformatics/bth349}
\bibitem{sibson1973}
Robin Sibson: ``SLINK: An optimally efficient algorithm for the single-link cluster method''. \textit{The Computer Journal} {\bf 16} (1): 30--34 (1973).
\url{https://doi.org/10.1093/comjnl/16.1.30}
\bibitem{snedecor1989}
George W. Snedecor, William G. Cochran: \textit{Statistical methods}. Ames, Iowa: Iowa State University Press (1989).
\bibitem{tamayo1999}
Pablo Tamayo, Donna Slonim, Jill Mesirov, Qing Zhu, Sutisak Kitareewan, Ethan Dmitrovsky, Eric S. Lander, Todd R. Golub: ``Interpreting patterns of gene expression with self-organizing maps: Methods and application to hematopoietic differentiation''. \textit{Proceedings of the National Academy of Science USA} {\bf 96} (6): 2907--2912 (1999). \url{https://doi.org/10.1073/pnas.96.6.2907}
\bibitem{tryon1970}
Robert C. Tryon, Daniel E. Bailey: \textit{Cluster analysis}. New York: McGraw-Hill (1970).
\bibitem{tukey1977}
John W. Tukey: ``Exploratory data analysis''. Reading, Mass.: Addison-Wesley Pub. Co. (1977).
\bibitem{waterman1987}
Michael S. Waterman, Mark Eggert: ``A new algorithm for best subsequence alignments with application to tRNA-rRNA comparisons'', \textit{Journal of Molecular Biology} {\bf 197} (4): 723--728 (1987). \url{https://doi.org/10.1016/0022-2836(87)90478-5}
\bibitem{yeung2001}
Ka Yee Yeung, Walter L. Ruzzo: ``Principal Component Analysis for clustering gene expression data''. \textit{Bioinformatics} {\bf 17} (9): 763--774 (2001).
\url{https://doi.org/10.1093/bioinformatics/17.9.763}
\end{thebibliography}
\end{document}

